﻿# encoding=utf-8
import requests
from bs4 import BeautifulSoup
def getConnection(url):
    r = requests.get(url)
    print(r.encoding)
    if 'wenxuemi' in url:
        demo = r.text.encode("ISO-8859-1").decode('gbk').encode('utf8')
    else:
        demo = r.text
    soup = BeautifulSoup(demo, 'html.parser')
    return soup
if __name__ == '__main__':
    soup = getConnection("http://guofeng.yuedu.163.com/source/9cc63831542a4bd5aca2c6e4ca7bd05b_4")
    tag0 = soup.find_all("h4")
    for i in range(0, len(tag0)):
        if tag0[i].parent.name == 'a':
            print(tag0[i].string)
            if u'章' in tag0[i].string:
                title = tag0[i].string
            print(tag0[i].parent.find_next('span').string)
            chapter = tag0[i].parent.find_next('span').string
            # 关于if判断语句处中文报assii错误的解决方法https://blog.csdn.net/use_my_heart/article/details/51303317
            if u'今天' in chapter or u'分' in chapter:
                print('*******************************')
                print("网易云阅读更新了")
                print("文学迷")
                soup = getConnection('https://www.wenxuemi.com/files/article/html/19/19358/')
                tag1 = soup.find_all("dd")
                for f in range(0,len(tag1)):
                    if tag1[f].a.string is None:
                        continue
                    if title in tag1[f].a.string:
                        print("************************")
                        print("文学迷也更新啦")
                        href_previous = tag1[f-1].a.attrs['href'].split('/')[-1]
                        url_previous = 'https://www.wenxuemi.com/files/article/html/19/19358/'+href_previous
                        soup = getConnection(url_previous)
                        tag2_previous = soup.find_all('div')
                        print("今天更新的第一章")
                        for g in range(0,len(tag2_previous)):
                            if 'content' == tag2_previous[g].get('id'):
                                print(soup.find("h1").string)
                                for child in tag2_previous[g].children:
                                    print(child)

                        href = tag1[f].a.attrs['href'].split('/')[-1]
                        url = 'https://www.wenxuemi.com/files/article/html/19/19358/'+href
                        soup = getConnection(url)
                        tag2 = soup.find_all('div')
                        print("今天更新的第二章")
                        for g in range(0,len(tag2)):
                            if 'content' == tag2[g].get('id'):
                                print(soup.find("h1").string)
                                for child in tag2[g].children:
                                    print(child)






